Extend perdomain_pt to support multiple vcpus.
Add biglock.
c->ldt_base = d->mm.ldt_base;
c->ldt_ents = d->mm.ldt_ents;
c->gdt_ents = 0;
- if ( GET_GDT_ADDRESS(d) == GDT_VIRT_START )
+ if ( GET_GDT_ADDRESS(d) == GDT_VIRT_START(d) )
{
for ( i = 0; i < 16; i++ )
c->gdt_frames[i] =
- l1_pgentry_to_pagenr(d->mm.perdomain_pt[i]);
+ l1_pgentry_to_pagenr(d->mm.perdomain_ptes[i]);
c->gdt_ents = GET_GDT_ENTRIES(d);
}
c->guestos_ss = d->thread.guestos_ss;
void startup_cpu_idle_loop(void)
{
/* Just some sanity to ensure that the scheduler is set up okay. */
- ASSERT(current->id == IDLE_DOMAIN_ID);
+ ASSERT(current->domain->id == IDLE_DOMAIN_ID);
domain_unpause_by_systemcontroller(current->domain);
__enter_scheduler();
__machine_halt(NULL);
}
-void free_perdomain_pt(struct exec_domain *d)
+void free_perdomain_pt(struct domain *d)
{
- free_xenheap_page((unsigned long)d->mm.perdomain_pt);
+ free_xenheap_page((unsigned long)d->mm_perdomain_pt);
}
void arch_do_createdomain(struct exec_domain *ed)
machine_to_phys_mapping[virt_to_phys(d->shared_info) >>
PAGE_SHIFT] = INVALID_P2M_ENTRY;
- ed->mm.perdomain_pt = (l1_pgentry_t *)alloc_xenheap_page();
- memset(ed->mm.perdomain_pt, 0, PAGE_SIZE);
- machine_to_phys_mapping[virt_to_phys(ed->mm.perdomain_pt) >>
+ d->mm_perdomain_pt = (l1_pgentry_t *)alloc_xenheap_page();
+ memset(d->mm_perdomain_pt, 0, PAGE_SIZE);
+ machine_to_phys_mapping[virt_to_phys(d->mm_perdomain_pt) >>
PAGE_SHIFT] = INVALID_P2M_ENTRY;
+ ed->mm.perdomain_ptes = d->mm_perdomain_pt;
}
int arch_final_setup_guestos(struct exec_domain *d, full_execution_context_t *c)
l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(ed->mm.perdomain_pt) | __PAGE_HYPERVISOR);
+ mk_l2_pgentry(__pa(p->mm_perdomain_pt) | __PAGE_HYPERVISOR);
ed->mm.pagetable = mk_pagetable((unsigned long)l2start);
l2tab += l2_table_offset(dsi.v_start);
for ( i = 16; i < 32; i++ )
{
- pfn = l1_pgentry_to_pagenr(d->mm.perdomain_pt[i]);
+ pfn = l1_pgentry_to_pagenr(d->mm.perdomain_ptes[i]);
if ( pfn == 0 ) continue;
- d->mm.perdomain_pt[i] = mk_l1_pgentry(0);
+ d->mm.perdomain_ptes[i] = mk_l1_pgentry(0);
page = &frame_table[pfn];
ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
- ASSERT_PAGE_IS_DOMAIN(page, d);
+ ASSERT_PAGE_IS_DOMAIN(page, d->domain);
put_page_and_type(page);
}
d, PGT_ldt_page)) )
return 0;
- ed->mm.perdomain_pt[off + 16] = mk_l1_pgentry(l1e | _PAGE_RW);
+ ed->mm.perdomain_ptes[off + 16] = mk_l1_pgentry(l1e | _PAGE_RW);
ed->mm.shadow_ldt_mapcnt++;
return 1;
pl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
mk_l2_pgentry((page_nr << PAGE_SHIFT) | __PAGE_HYPERVISOR);
pl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(page->u.inuse.domain->exec_domain[0]->mm.perdomain_pt) |
+ mk_l2_pgentry(__pa(page->u.inuse.domain->mm_perdomain_pt) |
__PAGE_HYPERVISOR);
#endif
* See domain.c:relinquish_list().
*/
ASSERT((x & PGT_validated) ||
- test_bit(DF_DYING, &page->u.inuse.domain->flags));
+ test_bit(DF_DYING, &page->u.inuse.domain->d_flags));
if ( unlikely((nx & PGT_count_mask) == 0) )
{
perfc_incrc(calls_to_mmu_update);
perfc_addc(num_page_updates, count);
+ LOCK_BIGLOCK(d);
+
cleanup_writable_pagetable(d, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE);
- if ( unlikely(!access_ok(VERIFY_READ, ureqs, count * sizeof(req))) )
+ if ( unlikely(!access_ok(VERIFY_READ, ureqs, count * sizeof(req))) ) {
+ UNLOCK_BIGLOCK(d);
return -EFAULT;
+ }
for ( i = 0; i < count; i++ )
{
if ( unlikely(success_count != NULL) )
put_user(count, success_count);
+ UNLOCK_BIGLOCK(d);
return rc;
}
if ( unlikely(page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT)) )
return -EINVAL;
+ LOCK_BIGLOCK(d);
+
cleanup_writable_pagetable(d, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE);
/*
if ( unlikely(deferred_ops & DOP_RELOAD_LDT) )
(void)map_ldt_shadow_page(0);
+ UNLOCK_BIGLOCK(d);
+
return err;
}
/* Ensure that there are no stale writable mappings in any TLB. */
/* NB. INVLPG is a serialising instruction: flushes pending updates. */
+#if 0
__flush_tlb_one(l1va); /* XXX Multi-CPU guests? */
+#else
+ flush_tlb_all();
+#endif
PTWR_PRINTK("[%c] disconnected_l1va at %p now %08lx\n",
PTWR_PRINT_WHICH, ptep, pte);
(ENTRIES_PER_L1_PAGETABLE - i) * sizeof(l1_pgentry_t));
unmap_domain_mem(pl1e);
ptwr_info[cpu].ptinfo[which].l1va = 0;
+ UNLOCK_BIGLOCK(d);
domain_crash();
}
l2_pgentry_t *pl2e, nl2e;
int which, cpu = smp_processor_id();
u32 l2_idx;
+ struct domain *d = current->domain;
+ LOCK_BIGLOCK(d);
/*
* Attempt to read the PTE that maps the VA being accessed. By checking for
* PDE validity in the L2 we avoid many expensive fixups in __get_user().
if ( !(l2_pgentry_val(linear_l2_table[addr>>L2_PAGETABLE_SHIFT]) &
_PAGE_PRESENT) ||
__get_user(pte, (unsigned long *)&linear_pg_table[addr>>PAGE_SHIFT]) )
+ {
+ UNLOCK_BIGLOCK(d);
return 0;
+ }
pfn = pte >> PAGE_SHIFT;
page = &frame_table[pfn];
/* We are looking only for read-only mappings of p.t. pages. */
if ( ((pte & (_PAGE_RW | _PAGE_PRESENT)) != _PAGE_PRESENT) ||
((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) )
+ {
+ UNLOCK_BIGLOCK(d);
return 0;
+ }
/* Get the L2 index at which this L1 p.t. is always mapped. */
l2_idx = page->u.inuse.type_info & PGT_va_mask;
if ( unlikely(l2_idx >= PGT_va_unknown) )
+ {
+ UNLOCK_BIGLOCK(d);
domain_crash(); /* Urk! This L1 is mapped in multiple L2 slots! */
+ }
l2_idx >>= PGT_va_shift;
/*
{
nl2e = mk_l2_pgentry(l2_pgentry_val(*pl2e) & ~_PAGE_PRESENT);
update_l2e(pl2e, *pl2e, nl2e);
+#if 0
flush_tlb(); /* XXX Multi-CPU guests? */
+#else
+ flush_tlb_all();
+#endif
}
/* Temporarily map the L1 page, and make a copy of it. */
/* Toss the writable pagetable state and crash. */
unmap_domain_mem(ptwr_info[cpu].ptinfo[which].pl1e);
ptwr_info[cpu].ptinfo[which].l1va = 0;
+ UNLOCK_BIGLOCK(d);
domain_crash();
}
+ UNLOCK_BIGLOCK(d);
+
/* Maybe fall through to shadow mode to propagate writable L1. */
return !current->mm.shadow_mode;
}
struct list_head *list_ent;
struct pfn_info *page;
- if ( d != current )
+ if ( d != current->domain )
domain_pause(d);
synchronise_pagetables(~0UL);
printk("pt base=%lx sh_info=%x\n",
- pagetable_val(d->mm.pagetable)>>PAGE_SHIFT,
+ pagetable_val(d->exec_domain[0]->mm.pagetable)>>PAGE_SHIFT,
virt_to_page(d->shared_info)-frame_table);
spin_lock(&d->page_alloc_lock);
/* PHASE 1 */
- adjust(&frame_table[pagetable_val(d->mm.pagetable)>>PAGE_SHIFT], -1, 1);
+ adjust(&frame_table[pagetable_val(d->exec_domain[0]->mm.pagetable)>>PAGE_SHIFT], -1, 1);
list_ent = d->page_list.next;
for ( i = 0; (list_ent != &d->page_list); i++ )
spin_unlock(&d->page_alloc_lock);
- adjust(&frame_table[pagetable_val(d->mm.pagetable)>>PAGE_SHIFT], 1, 1);
+ adjust(&frame_table[pagetable_val(d->exec_domain[0]->mm.pagetable)>>PAGE_SHIFT], 1, 1);
printk("Audit %d: Done. ctot=%d ttot=%d\n", d->id, ctot, ttot );
- if ( d != current )
+ if ( d != current->domain )
domain_unpause(d);
}
spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->exec_domain[0]->mm.perdomain_pt) |
+ mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm_perdomain_pt) |
__PAGE_HYPERVISOR);
#endif
struct domain *d = ed->domain;
extern int map_ldt_shadow_page(unsigned int);
int cpu = ed->processor;
+ int ret;
__asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
unlikely((addr >> L2_PAGETABLE_SHIFT) ==
ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx) )
{
+ LOCK_BIGLOCK(d);
ptwr_flush(PTWR_PT_ACTIVE);
+ UNLOCK_BIGLOCK(d);
return;
}
(addr < PAGE_OFFSET) && shadow_fault(addr, error_code) )
return; /* Returns TRUE if fault was handled. */
- if ( unlikely(addr >= LDT_VIRT_START) &&
- (addr < (LDT_VIRT_START + (ed->mm.ldt_ents*LDT_ENTRY_SIZE))) )
+ if ( unlikely(addr >= LDT_VIRT_START(ed)) &&
+ (addr < (LDT_VIRT_START(ed) + (ed->mm.ldt_ents*LDT_ENTRY_SIZE))) )
{
/*
* Copy a mapping from the guest's LDT, if it is valid. Otherwise we
* send the fault up to the guest OS to be handled.
*/
- off = addr - LDT_VIRT_START;
+ LOCK_BIGLOCK(d);
+ off = addr - LDT_VIRT_START(ed);
addr = ed->mm.ldt_base + off;
- if ( likely(map_ldt_shadow_page(off >> PAGE_SHIFT)) )
+ ret = map_ldt_shadow_page(off >> PAGE_SHIFT);
+ UNLOCK_BIGLOCK(d);
+ if ( likely(ret) )
return; /* successfully copied the mapping */
}
trap_info_t cur;
trap_info_t *dst = current->thread.traps;
+ LOCK_BIGLOCK(current->domain);
+
for ( ; ; )
{
if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
traps++;
}
+ UNLOCK_BIGLOCK(current->domain);
+
return 0;
}
for ( i = 0; i < 16; i++ )
{
- if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_pt[i])) != 0 )
+ if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_ptes[i])) != 0 )
put_page_and_type(&frame_table[pfn]);
- ed->mm.perdomain_pt[i] = mk_l1_pgentry(0);
+ ed->mm.perdomain_ptes[i] = mk_l1_pgentry(0);
}
}
/* Install the new GDT. */
for ( i = 0; i < nr_pages; i++ )
- ed->mm.perdomain_pt[i] =
+ ed->mm.perdomain_ptes[i] =
mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
- SET_GDT_ADDRESS(ed, GDT_VIRT_START);
+ SET_GDT_ADDRESS(ed, GDT_VIRT_START(ed));
SET_GDT_ENTRIES(ed, entries);
return 0;
if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
return -EFAULT;
+ LOCK_BIGLOCK(current->domain);
+
if ( (ret = set_gdt(current, frames, entries)) == 0 )
{
local_flush_tlb();
__asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
}
+ UNLOCK_BIGLOCK(current->domain);
+
return ret;
}
{
unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT, d[2];
struct pfn_info *page;
+ struct exec_domain *ed;
long ret = -EINVAL;
d[0] = word1;
d[1] = word2;
- if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(d) )
+ LOCK_BIGLOCK(current->domain);
+
+ if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(d) ) {
+ UNLOCK_BIGLOCK(current->domain);
return -EINVAL;
+ }
page = &frame_table[pfn];
- if ( unlikely(!get_page(page, current->domain)) )
+ if ( unlikely(!get_page(page, current->domain)) ) {
+ UNLOCK_BIGLOCK(current->domain);
return -EINVAL;
+ }
/* Check if the given frame is in use in an unsafe context. */
switch ( page->u.inuse.type_info & PGT_type_mask )
{
case PGT_gdt_page:
/* Disallow updates of Xen-reserved descriptors in the current GDT. */
- if ( (l1_pgentry_to_pagenr(current->mm.perdomain_pt[0]) == pfn) &&
- (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
- (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
- goto out;
+ for_each_exec_domain(current->domain, ed) {
+ if ( (l1_pgentry_to_pagenr(ed->mm.perdomain_ptes[0]) == pfn) &&
+ (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
+ (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
+ goto out;
+ }
if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
goto out;
break;
out:
put_page(page);
+
+ UNLOCK_BIGLOCK(current->domain);
+
return ret;
}
/* Get base and check limit. */
if ( ldt )
{
- table = (unsigned long *)LDT_VIRT_START;
+ table = (unsigned long *)LDT_VIRT_START(d);
if ( idx >= d->mm.ldt_ents )
goto fail;
}
/* Get base and check limit. */
if ( ldt )
{
- table = (unsigned long *)LDT_VIRT_START;
+ table = (unsigned long *)LDT_VIRT_START(d);
if ( idx >= d->mm.ldt_ents )
{
DPRINTK("Segment %04x out of LDT range (%d)\n",
else if ( unlikely((d = find_domain_by_id(domid)) == NULL) )
return -ESRCH;
+ LOCK_BIGLOCK(d);
+
switch ( op )
{
case MEMOP_increase_reservation:
if ( unlikely(domid != DOMID_SELF) )
put_domain(d);
+ UNLOCK_BIGLOCK(d);
+
return rc;
}
spin_lock_init(&d->time_lock);
+ spin_lock_init(&d->big_lock);
+
spin_lock_init(&d->page_alloc_lock);
INIT_LIST_HEAD(&d->page_list);
INIT_LIST_HEAD(&d->xenpage_list);
void domain_destruct(struct domain *d)
{
struct domain **pd;
- struct exec_domain *ed;
atomic_t old, new;
if ( !test_bit(DF_DYING, &d->d_flags) )
destroy_event_channels(d);
grant_table_destroy(d);
- for_each_exec_domain(d, ed)
- free_perdomain_pt(ed);
+ free_perdomain_pt(d);
free_xenheap_page((unsigned long)d->shared_info);
free_domain_struct(d);
memcpy(&ed->thread, &idle0_exec_domain.thread, sizeof(ed->thread));
/* arch_do_createdomain */
- ed->mm.perdomain_pt = (l1_pgentry_t *)alloc_xenheap_page();
- memset(ed->mm.perdomain_pt, 0, PAGE_SIZE);
- machine_to_phys_mapping[virt_to_phys(ed->mm.perdomain_pt) >>
- PAGE_SHIFT] = INVALID_P2M_ENTRY;
+ ed->mm.perdomain_ptes = d->mm_perdomain_pt + (ed->eid << PDPT_VCPU_SHIFT);
sched_add_domain(ed);
if ( count > 512 )
return -EINVAL;
+ LOCK_BIGLOCK(current->domain);
+
switch ( cmd )
{
case GNTTABOP_map_grant_ref:
break;
}
+ UNLOCK_BIGLOCK(current->domain);
+
return rc;
}
/* Block the currently-executing domain until a pertinent event occurs. */
long do_block(void)
{
- ASSERT(current->id != IDLE_DOMAIN_ID);
+ ASSERT(current->domain->id != IDLE_DOMAIN_ID);
current->vcpu_info->evtchn_upcall_mask = 0;
set_bit(EDF_BLOCKED, ¤t->ed_flags);
TRACE_2D(TRC_SCHED_BLOCK, current->id, current);
task_slice_t next_slice;
s32 r_time; /* time for new dom to run */
- cleanup_writable_pagetable(
- prev->domain, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE);
+ if ( !is_idle_task(current->domain) )
+ {
+ LOCK_BIGLOCK(current->domain);
+ cleanup_writable_pagetable(
+ prev->domain, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE);
+ UNLOCK_BIGLOCK(current->domain);
+ }
perfc_incrc(sched_run);
extern unsigned long xenheap_phys_end; /* user-configurable */
#endif
-#define GDT_VIRT_START (PERDOMAIN_VIRT_START)
-#define GDT_VIRT_END (GDT_VIRT_START + (64*1024))
-#define LDT_VIRT_START (GDT_VIRT_END)
-#define LDT_VIRT_END (LDT_VIRT_START + (64*1024))
+#define GDT_VIRT_START(ed) (PERDOMAIN_VIRT_START + ((ed)->eid << PDPT_VCPU_VA_SHIFT))
+#define GDT_VIRT_END(ed) (GDT_VIRT_START(ed) + (64*1024))
+#define LDT_VIRT_START(ed) (PERDOMAIN_VIRT_START + (64*1024) + ((ed)->eid << PDPT_VCPU_VA_SHIFT))
+#define LDT_VIRT_END(ed) (LDT_VIRT_START(ed) + (64*1024))
+
+#define PDPT_VCPU_SHIFT 5
+#define PDPT_VCPU_VA_SHIFT (PDPT_VCPU_SHIFT + PAGE_SHIFT)
#if defined(__x86_64__)
#define ELFSIZE 64
extern int arch_final_setup_guestos(
struct exec_domain *d, full_execution_context_t *c);
-extern void free_perdomain_pt(struct exec_domain *d);
+extern void free_perdomain_pt(struct domain *d);
extern void domain_relinquish_memory(struct domain *d);
{
cpu = smp_processor_id();
desc = (struct desc_struct *)GET_GDT_ADDRESS(p) + __LDT(cpu);
- desc->a = ((LDT_VIRT_START&0xffff)<<16) | (ents*8-1);
- desc->b = (LDT_VIRT_START&(0xff<<24)) | 0x8200 |
- ((LDT_VIRT_START&0xff0000)>>16);
+ desc->a = ((LDT_VIRT_START(p)&0xffff)<<16) | (ents*8-1);
+ desc->b = (LDT_VIRT_START(p)&(0xff<<24)) | 0x8200 |
+ ((LDT_VIRT_START(p)&0xff0000)>>16);
__asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
}
}
* Every domain has a L1 pagetable of its own. Per-domain mappings
* are put in this table (eg. the current GDT is mapped here).
*/
- l1_pgentry_t *perdomain_pt;
+ l1_pgentry_t *perdomain_ptes;
pagetable_t pagetable;
/* shadow mode status and controls */
#define IDLE0_MM \
{ \
- perdomain_pt: 0, \
- pagetable: mk_pagetable(__pa(idle_pg_table)) \
+ perdomain_ptes: 0, \
+ pagetable: mk_pagetable(__pa(idle_pg_table)) \
}
/* Convenient accessor for mm.gdt. */
};
+#if 01
+#define LOCK_BIGLOCK(_d) spin_lock(&(_d)->big_lock)
+#define UNLOCK_BIGLOCK(_d) spin_unlock(&(_d)->big_lock)
+#else
+#define LOCK_BIGLOCK(_d) (void)(_d)
+#define UNLOCK_BIGLOCK(_d)
+#endif
+
struct domain {
domid_t id;
s_time_t create_time;
shared_info_t *shared_info; /* shared data area */
spinlock_t time_lock;
+ spinlock_t big_lock;
+
+ l1_pgentry_t *mm_perdomain_pt;
+
spinlock_t page_alloc_lock; /* protects all the following fields */
struct list_head page_list; /* linked list, of size tot_pages */
struct list_head xenpage_list; /* linked list, of size xenheap_pages */